import hashlib

import scrapy
from ..items import MyFileItem
import re
import json


class SsoSup (scrapy.Spider):
    name = 'brn_agc_act'
    allowed_domains = ['agc.gov.bn']
    # 文莱总检察院地址
    start_urls = ['http://www.agc.gov.bn/AGC%20Site%20Pages/Laws%20of%20Brunei.aspx']
    id = 0

    # 解析初始页面
    def parse(self, response):
        # 获得首页成员列表
        trs = response.xpath('//*[@id="layoutsTable"]/tbody/tr/td/div/div/table/tbody/tr')
        for tr in trs:
            # 章
            ChapterNoS = tr.xpath('./td[1]//text()')
            ChapterNo = ''
            if len(ChapterNoS):
                for ChapterNoSl in ChapterNoS:
                    ChapterNo = ChapterNo + ChapterNoSl.get().strip()
            ChapterNo = 'Chapter ' + ' ' + ChapterNo
            # 详细地址
            detailUrltrS = tr.xpath('.//a/@href')
            if len(detailUrltrS):
                for detailUrltr in detailUrltrS:
                    detailUrl = 'http://www.' + self.allowed_domains[0] + detailUrltr.get().strip()
                    yield response.follow(detailUrl, callback=self.detailed, dont_filter=True,
                                          meta={'ChapterNo': ChapterNo})

    # 解析详情页
    def detailed(self, response):
        item = MyFileItem()
        if response.status == 200:
            def iadd():
                self.id += 1
                return self.id

            # 法律详细网址
            nowurl = response.url
            detailUrl = nowurl
            item['detailUrl'] = detailUrl
            # 获取下载路径
            item['downloadUrl'] = detailUrl
            # 下载网址
            item['file_urls'] = ''
            # 法律章节号
            ChapterNo = response.meta['ChapterNo']
            item['country'] = 'Brunei'
            item['website'] = 'agcgovbn'
            item['modular'] = 'act'
            # 下载文件格式
            item['ext'] = 'pdf'

            # 下载文件名
            fina = ''
            if len(detailUrl) > 0:
                fina = 'f' + str(hashlib.md5(detailUrl.encode('utf-8')).hexdigest())
            item['fina'] = fina

            # 章节名
            item['chapNo'] = ''

            # 标题名
            item['title'] = ''
            # 原文内容
            item['htmls'] = ''
            item['htmlUrl'] = ''

            item['abstract'] = ''
            item['abstractUrl'] = ''

            # 唯一ID
            systemid = str(hashlib.md5(detailUrl.encode('utf-8')).hexdigest())

            item['LegalName'] = ''
            item['Organizaation'] = ''
            item['PublishDate'] = ''
            item['EffectiveDate'] = ''
            item['SortA'] = 'LAWCOUNTRYWL'
            item['SortB'] = 'LANGUAGEYY'
            item['SortC'] = ''
            item['SortD'] = ''
            item['SORTE'] = ''
            item['SORTF'] = ''
            item['Keyword'] = ''
            item['SORTG'] = ''
            item['ChapNo'] = ''
            item['Articles'] = ''
            item['Chapter'] = ChapterNo
            item['Section'] = ''
            item['SYS_FLD_DIGITFILENAME'] = fina
            item['FileUrl'] = ''
            item['DownLoadUrl'] = detailUrl
            item['DownLoadWebNameC'] = '文莱总检查院办公室'
            item['DownLoadWebNameE'] = "Attorney General Chambers - Laws of Brune"
            item['SYSID'] = systemid
            item['Website'] = 'Acts'
            item['Isconversion'] = '0'
            item['Revisionmark'] = ''

            yield item
